
## Towards an integrated early warning approach in UN peacekeeping
## Analysis script
## R versions 4.2.1 - 

pkgs <- c("rstudioapi", "pscl", "randomForest", "ranger", "caret", "PRROC", "foreach", "sf", "spdep", "xtable")
install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = TRUE ) 
invisible(lapply(as.list(pkgs), library, character.only = TRUE))

rm(list = ls()); gc()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
source("DFS_EarlyWarningUN_fun.R")

df1 <- readRDS("data/DFS_EarlyWarningUN.rds")

validation.year <- 2021
test.year <- 2022

new.pred <- FALSE
validation.or.test <- "test"
save.RFs <- TRUE


#### 2. Prediction: Classification ####
if(validation.or.test=="validation"){test.year <- validation.year}
features.spatial <- c("lat", "long")
features.structural <- c("road_dist", "water_dist", "unbase", "voronoi_area_sqkm", features.spatial)
features.seasonality <- c("year", "year2", "year3", "doy", "doy2", "doy3")
features.dv <- c("event_lag1", "event_lag2", "event_lag1_j1", "event_lag2_j1", "event_lag1_j2", "event_lag2_j2")
features.null <- c(features.dv, features.seasonality, features.structural)
features.vci <- c("vci_lag1", "vci_lag1_j1_max", "vci_lag1_j2_max")
features.full <- c(features.vci, features.null)

df2.train <- df1[df1$year<test.year & complete.cases(df1[,features.full]),]; rownames(df2.train) <- 1:nrow(df2.train)
df2.test <- df1[df1$year==test.year & complete.cases(df1[,features.full]),]; rownames(df2.test) <- 1:nrow(df2.test)
df2.test$geometry <- df1$geometry[df1$year==test.year & complete.cases(df1[,features.full])]


## Descriptives and overview
table(df2.train$event)
table(df2.train$fatalities)
plot(density(na.omit(df2.train$vci_lag1)))

descr.tab1 <- t(apply(df2.train[,c("event_num", features.full[!grepl("doy2|doy3|year2|year3", features.full)])], 2, 
                                   \(x){c(
                                     min(x, na.rm = TRUE),
                                     mean(x, na.rm = TRUE), 
                                     median(x, na.rm = TRUE),
                                     max(x, na.rm = TRUE))}))
var.names <- c("Inter-com. conflict event", "VCI (t-1)", "VCI (t-1, j-1)", "VCI (t-1, j-2)", 
               "Inter-com. conflict event (t-1)", "Inter-com. conflict event (t-2)",
               "Inter-com. conflict event (t-1, j-1)", "Inter-com. conflict event (t-2, j-1)",
               "Inter-com. conflict event (t-1, j-2)", "Inter-com. conflict event (t-2, j-2)",
               "Year", "Day of Year",
               "Distance to road", "Distance to water", "Distance to UN base", 
               "Voronoi cell size (km$^2$)", "Latitude", "Longitude")
descr.tab2 <- apply(descr.tab1, 2, \(x){gsub("(\\.)(0{1,})$", "\\1", as.character(round(x, 3)), perl = TRUE)})
descr.tab3 <- data.frame(rownames(descr.tab1), descr.tab2)
colnames(descr.tab3) <- c("Label", "Min.", "Mean", "Median", "Max.")
rownames(descr.tab3) <- var.names
xtable::xtable(descr.tab3, caption = "Training set descriptives", label = "tab:descr")
                                      
                                      

#### 2.1 Rolling test window: df4 ----
n.frames <- length(unique(df2.test$doy))
type <- paste0(validation.or.test, "_", "class")
tune.grid <- 1
unique.doys <- unique(df2.test$doy)
if(validation.or.test=="test"){
  frames <- c(1:n.frames)}else{frames <- seq(1, n.frames, by = 3)}
frames.max <- max(frames)
n.frames.half <- frames[(length(frames)/2)]
if(validation.or.test=="test"){
  n.trees <- 500}else{n.trees <- 400}

file1.name <- paste0("rolling.preds.", test.year, ".", type, ".doy1to", n.frames.half, ".rds")
file2.name <- paste0("rolling.preds.", test.year, ".", type, ".doy", (n.frames.half+1), "to", frames.max, ".rds")

Sys.time()
if(new.pred==TRUE){
  rm(df2.train, df2.test)
  set.seed(1313)
  rolling.preds <- list()
  for(i in frames){
    for(tune in tune.grid){
      df4.train <- df1[!(df1$year==test.year & df1$doy >= unique.doys[i]) & !(df1$year>test.year) & complete.cases(df1[,features.full]),]
      rownames(df4.train) <- 1:nrow(df4.train)
      df4.test <- df1[df1$year == test.year & df1$doy == unique.doys[i] & complete.cases(df1[,features.full]),]
      df4.test$geometry <- df1$geometry[df1$year == test.year & df1$doy == unique.doys[i] & complete.cases(df1[,features.full])]
      rownames(df4.test) <- 1:nrow(df4.test)
      
      class.size.1 <- sum(df4.train$event==1)
      class.size.0 <- sum(df4.train$event==0)
      # class weights for split weighting:
      class.weight.1 <- (nrow(df4.train)/(2*class.size.1)) 
      class.weight.0 <- (nrow(df4.train)/(2*class.size.0))
      # apply equal case weights (with test set == 0 for ranger be able to use holdout set for OOS var importance):
      df4.train$case.weights <- 1
      df4.test$case.weights <- 0
      df4 <- rbind(df4.train, df4.test)
      
      rf1 <- ranger(x = df4[,features.full], y = df4$event, probability = TRUE, verbose = FALSE, num.trees = n.trees,
                    min.node.size = 1, min.bucket = 1, mtry = floor(sqrt(length(features.full))),
                    replace = TRUE, importance = "permutation", 
                    class.weights = c(class.weight.0, class.weight.1),
                    holdout = TRUE, case.weights = df4$case.weights)
      df4.test$event.prob.full <- predict(rf1, data = df4.test[,colnames(df4.test)%in%c("event", features.full)])$predictions[,2]
      
      rf2 <- ranger(x = df4[,features.null], y = df4$event, probability = TRUE, verbose = FALSE, num.trees = n.trees,
                    min.node.size = 1, min.bucket = 1, mtry = floor(sqrt(length(features.null))),
                    replace = TRUE, importance = "permutation", 
                    class.weights = c(class.weight.0, class.weight.1),
                    holdout = TRUE, case.weights = df4$case.weights)
      df4.test$event.prob.null <- predict(rf2, data = df4.test[,colnames(df4.test)%in%c("event", features.null)])$predictions[,2]
      
      tune.i <- which(tune.grid%in%tune) + ((which(frames%in%i)-1)*length(tune.grid))
      if(save.RFs==TRUE){ rolling.preds[[tune.i]] <- list(rf1 = rf1, rf2 = rf2, data = df4.test) }else{ rolling.preds[[tune.i]] <- df4.test}
      names(rolling.preds)[tune.i] <- paste0(tune, "tune/", type, ": ", unique.doys[i])
      
      print(paste0(Sys.time(), ": ", tune, "tune/", type, ": ", i, "/", frames.max))
      if(i==n.frames.half){saveRDS(rolling.preds, file = file1.name); rolling.preds <- list(); gc()}
      if(i==frames.max){saveRDS(rolling.preds, file = file2.name)}
    }}
  rm(rolling.preds); gc()
}

# which features to extract from results:
vars.of.interest <- c("geometry", "id", "doy", "time", "event", "vci", "event.prob.full", "event.prob.null", "unbase", "voronoi_area_sqkm")
# retrieve first half of rolling.preds:
rolling.preds <- readRDS(file1.name) 
preds1 <- do.call("rbind", lapply(rolling.preds, if(save.RFs==TRUE){function(x){ x[["data"]][,vars.of.interest] }}else{function(x){ x[,vars.of.interest] }}) )
if(save.RFs==TRUE){ rf.imp <- do.call("rbind", lapply(rolling.preds, function(x){ ranger::importance(x[["rf1"]]) } )) }
# retrieve and add second half of rolling.preds:
rm(rolling.preds); gc()
rolling.preds <- readRDS(file2.name) 
all.preds <- rbind(preds1, do.call("rbind", lapply(rolling.preds[!sapply(rolling.preds, is.null)], if(save.RFs==TRUE){function(x){ x[["data"]][,vars.of.interest] }}else{function(x){ x[,vars.of.interest] }}) ))
if(save.RFs==TRUE){ rf.imp <- rbind(rf.imp, do.call("rbind", lapply(rolling.preds[!sapply(rolling.preds, is.null)], function(x){ ranger::importance(x[["rf1"]]) } ))) }
if(!all(c(names(table(all.preds$doy))==unique.doys[frames], if(save.RFs==TRUE){gsub("^.{1,}\\s", "", row.names(rf.imp))==unique.doys[frames]}))){
  "Careful, something went wrong in binding the two sets of results."
}

sort(all.preds$event.prob.full, decreasing = TRUE)[1:50]; mean(all.preds$event.prob.full)+3*sd(all.preds$event.prob.full)
sum(all.preds$event.prob.full[all.preds$event==1])/sum(all.preds$event.prob.full)
sort(all.preds$event.prob.null, decreasing = TRUE)[1:50]; mean(all.preds$event.prob.null)+3*sd(all.preds$event.prob.null)
sum(all.preds$event.prob.null[all.preds$event==1])/sum(all.preds$event.prob.null)
hist.color <- rgb(red = 0, green = 0, blue = 0, alpha = 0.3)
# pdf(file = paste0("figures/separation_plots.", type, ".pdf"), width = 10, height = 8)
par(mar = c(5.1, 4.1, 4.1, 4.1), mfrow = c(2,1))
plot(1:length(all.preds$event.prob.full), sort(all.preds$event.prob.full, decreasing = FALSE), type = "l", ylim = c(0,0.06), bty = "n", yaxt = "n",
     xlab = "Full model out-of-sample pred. probabilities and actuals", ylab = "Predicted probability")
axis(2, at = seq(0, 0.06, by = 0.01), labels = c(".00",".01",".02",".03",".04",".05",".06"), las = 2)
grid(nx = 0, ny = NULL, col = "lightgray", lty = 3)
par(new = TRUE)
hist(which(all.preds$event[order(all.preds$event.prob.full, decreasing = FALSE)]==1), ylim = c(0,500), 
     col = hist.color, border = rgb(red = 0, green = 0, blue = 0, alpha = 0), 
     axes = FALSE, bty = "n", xlab = "", ylab = "", main = "", xaxt="n", freq = TRUE, breaks = 20)
axis(4, at = seq(0,400,by=100), labels = seq(0,400,by=100), col = hist.color, col.axis = hist.color, las = 2)
mtext("Event frequency", side = 4, line = 3, col = hist.color)
# segments(x0 = which(all.preds$event[order(all.preds$event.prob.full, decreasing = FALSE)]==1), y0 = -0.01, y1 = -0.0005, col = "red")
plot(1:length(all.preds$event.prob.null), sort(all.preds$event.prob.null, decreasing = FALSE), type = "l", ylim = c(0,0.06), bty = "n", yaxt = "n",
     xlab = "Null model out-of-sample pred. probabilities and actuals", ylab = "Predicted probability")
axis(2, at = seq(0, 0.06, by = 0.01), labels = c(".00",".01",".02",".03",".04",".05",".06"), las = 2)
grid(nx = 0, ny = NULL, col = "lightgray", lty = 3)
par(new = TRUE)
hist(which(all.preds$event[order(all.preds$event.prob.null, decreasing = FALSE)]==1), ylim = c(0,500),
     col = hist.color, border = rgb(red = 0, green = 0, blue = 0, alpha = 0), 
     axes = FALSE, bty = "n", xlab = "", ylab = "", main = "", xaxt="n", freq = TRUE, breaks = 20)
axis(4, at = seq(0,400,by=100), labels = seq(0,400,by=100), col = hist.color, col.axis = hist.color, las = 2)
mtext("Event frequency", side = 4, line = 3, col = hist.color)
# segments(x0 = which(all.preds$event[order(all.preds$event.prob.null, decreasing = FALSE)]==1), y0 = -0.01, y1 = -0.0005, col = "red")
# gist: full has more TP/assigns higher prob to actual realisations than null, but also has more FN/has several actual events that slip through the net.
par(mar = c(5.1, 4.1, 4.1, 2.1), mfrow = c(1,1))
dev.off()

# NS tuning:
# all.preds$ns <- gsub("^(\\d{1,})(.{1,})$", "\\1", rownames(all.preds), perl = TRUE)
# by(all.preds, all.preds$ns, \(x){ list(unique(x$ns), full = mean((x$event.prob.full-as.numeric(x$event=="1"))^2)) })
# by(all.preds, all.preds$ns, \(x){ list(unique(x$ns), full = PRROC::pr.curve(scores.class0 = x$event.prob.full[x$event==1], scores.class1 = x$event.prob.full[x$event==0], curve = TRUE)) })
# all.preds <- all.preds[all.preds$ns==50,]

# Brier
# Brier vs AUC: https://stats.stackexchange.com/questions/489106/brier-score-and-extreme-class-imbalance
mean((all.preds$event.prob.full-as.numeric(all.preds$event=="1"))^2) # 0.004112837
mean((all.preds$event.prob.null-as.numeric(all.preds$event=="1"))^2) # 0.00422752
# Brier by doy:
print(brier.doy <- by(all.preds, all.preds$doy, \(x){ list(full = mean((x$event.prob.full-as.numeric(x$event=="1"))^2),
                                                           null = mean((x$event.prob.null-as.numeric(x$event=="1"))^2) ) }))

# AUroC
print( AUroC.full <- PRROC::roc.curve(scores.class0 = all.preds$event.prob.full[all.preds$event==1],
                                      scores.class1 = all.preds$event.prob.full[all.preds$event==0], curve = TRUE))
print( AUroC.null <- PRROC::roc.curve(scores.class0 = all.preds$event.prob.null[all.preds$event==1],
                                      scores.class1 = all.preds$event.prob.null[all.preds$event==0], curve = TRUE))

# AUprC
print( AUprC.full <- PRROC::pr.curve(scores.class0 = all.preds$event.prob.full[all.preds$event==1],
                                     scores.class1 = all.preds$event.prob.full[all.preds$event==0], curve = TRUE))
print( AUprC.null <- PRROC::pr.curve(scores.class0 = all.preds$event.prob.null[all.preds$event==1],
                                     scores.class1 = all.preds$event.prob.null[all.preds$event==0], curve = TRUE))
# alternative implementation: PRROC::pr.curve(scores.class0 = all.preds$event.prob.full, weights.class0 = as.numeric(all.preds$event==1), curve = TRUE)

# AUprC by doy:
auprc.doy <- by(all.preds, all.preds$doy, \(x){ list(full = PRROC::pr.curve(scores.class0 = x$event.prob.full[x$event==1],
                                                                                  scores.class1 = x$event.prob.full[x$event==0], curve = TRUE),
                                                           null = PRROC::pr.curve(scores.class0 = x$event.prob.null[x$event==1],
                                                                                  scores.class1 = x$event.prob.null[x$event==0], curve = TRUE)) })
auprc.doy.integrals <- lapply(auprc.doy, \(x){lapply(x, \(y){y$auc.integral})})

# https://weatherspark.com/h/y/32134/2022/Historical-Weather-during-2022-in-Bamako-Mali#Figures-Summary
rain.start <- as.numeric(strftime("15/06/2022", format = "%j")) 
rain.end <- as.numeric(strftime("15/09/2022", format = "%j"))
rain.x <- c(rain.start/max(unique.doys), rain.end/max(unique.doys)) * length(frames)

for(i in 1:2){ # i <- 2
  plotted.metric <- list(brier.doy, auprc.doy.integrals)[[i]]
  metric.name <- c("Brier score", "Area under the PR curve")[i]
  at.x <- 1:length(frames)
  at.y <- unlist(lapply(plotted.metric, \(x){x["full"]}))
  rounded.max.y <- if(i==1){ round(max(at.y),3) }else{ round(max(at.y),1) }
  pdf(file = paste0("figures/", c("brier.", "prAUC.")[i], type, ".pdf"), width = 10, height = 7)
  par(mar = c(5.1, 4.5, 4.1, 4.1), mfrow = c(1,1), xpd = TRUE)
  plot(at.x, at.y, 
       type = "b", ylim = c(0,rounded.max.y), bty = "n", yaxt="n", xaxt="n", xlab = paste0("Day of year, ", test.year), ylab = "")
  axis(1, at.x, labels = unique.doys[frames], cex.axis = 0.8)
  axis(2, seq(0, rounded.max.y, length = 7), labels = sprintf("%.3f", seq(0, rounded.max.y, length = 7)), cex.axis = 0.95, las = 2)
  mtext(metric.name, side = 2, line = 3.5)
  lines(at.x, unlist(lapply(plotted.metric, \(x){x["null"]})), type = "b", col = "gray80")
  legend("topleft", fill = c("black", "gray"), border = c("black", "gray"), legend = c("Full model", "Null model"), bty = "n")
  polygon(x = c(rain.x, rev(rain.x)), y = c(0-(0.04*rounded.max.y),0-(0.04*rounded.max.y),rounded.max.y,rounded.max.y), 
          col = rgb(0,0.7,1,0.09), border = NA)
  text(mean(rain.x), rounded.max.y+(0.03*rounded.max.y), "Rainy season", col = rgb(0,0.7,1,1))
  par(new = TRUE)
  vci.means <- aggregate(all.preds$vci, list(time = all.preds$time), mean, na.rm = TRUE)$x
  plot(at.x, ylim = c(0,(max(vci.means)*5)), col = NULL, axes = FALSE, bty = "n", xlab = "", ylab = "", xaxt="n")
  segments(x0 = (at.x+0.1), y0 = 0, y1 = vci.means, col = "darkgreen")
  # barplot(vci.means ~ at.x, col = "white", axes = FALSE, bty = "n", xlab = "", ylab = "", xaxt="n", xpd = TRUE, space = 0.5, 
  #         width = 0.5, xlim = c(1,n.frames), ylim = c(0,(max(vci.means)*5)))
  axis(4, at = seq(0,100,by=20), labels = seq(0,100,by=20), col = "darkgreen", col.axis = "darkgreen", las = 2, cex.axis = 0.65)
  text(max(at.x)+1.1, y = -32, "Mean VCI", col = "darkgreen", cex = 0.65, srt = 270)
  par(new = TRUE)
  number.events <- aggregate(as.numeric(all.preds$event==1), list(time = all.preds$time), sum)$x
  plot(at.x, ylim = c(0,(max(number.events)*5)), col = NULL, axes = FALSE, bty = "n", xlab = "", ylab = "", xaxt="n")
  segments(x0 = (at.x-0.1), y0 = 0, y1 = number.events, col = "red")
  axis(4, at = seq(0,60,by=20), labels = seq(0,60,by=20), col = "red", col.axis = "red", las = 2, cex.axis = 0.65, line = 2.3)
  text(max(at.x)+2.4, y = -38, "Number of events", col = "red", cex = 0.65, srt = 270)
  # rain season: canopy etc is "reset" and from there, path dependencies take over again
  dev.off()
  par(mar = c(5.1, 4.1, 4.1, 2.1), mfrow = c(1,1))
}

# P-R curve
plot(AUprC.full)
plot(AUprC.null)
hold.pr <- pr.fun(event.prob.df = all.preds[,c("event", "event.prob.full", "event.prob.null")])

# Calculating AUprC by hand:
library(Bolstad2)
sintegral(hold.pr$recall.1, hold.pr$precision.1)$int
sintegral(hold.pr$recall.2, hold.pr$precision.2)$int

# pdf(paste0("figures/PRcurve1.", type, ".pdf"), width = 8, height = 6)
par(mfrow = c(1,1), mar = c(5.1, 4.1, 2.1, 2.1), oma = c(0, 0, 0, 0) )
plot( hold.pr$threshold, hold.pr$precision.1, type = "l", ylim = c(0,1), bty = "n",
      ylab = "Precision and recall", xlab = "Classification threshold")
lines( hold.pr$threshold, hold.pr$recall.1, lty = 2 )
#abline(v = hold.pr$threshold[hold.pr$F1.1==max(na.omit(hold.pr$F1.1))], col = "red")
legend("topright", bty = "n",
       legend = c("Recall (full)", "Precision (full)", "Recall (null)", "Precision (null)"),#, "F1 max (1)", "F1 max (2)"), 
       lty = c(2,1,2,1),#,1,1), 
       col = c("black", "black", "gray80", "gray80"))#, "red", "orange"))
lines( hold.pr$threshold, hold.pr$precision.2, col = "gray80" )
lines( hold.pr$threshold, hold.pr$recall.2, lty = 2,  col = "gray80" )
#abline(v = hold.pr$threshold[hold.pr$F1.2==max(na.omit(hold.pr$F1.2))], col = "orange")
par(mfrow = c(1,1), mar = c(5.1, 4.1, 4.1, 2.1), oma = c(0, 0, 0, 0) )
dev.off()
# pdf(paste0("figures/PRcurve2.", type, ".pdf"), width = 8, height = 6)
par(mfrow = c(1,1), mar = c(5.1, 4.1, 2.1, 2.1), oma = c(0, 0, 0, 0) )
plot( sort(hold.pr$recall.1), hold.pr$precision.1[order(hold.pr$recall.1)], type = "l",
      ylab = "Precision", xlab = "Recall", ylim = c(0,1), bty = "n")
lines(sort(hold.pr$recall.2), hold.pr$precision.2[order(hold.pr$recall.2)], col = "gray80")
#points(hold.pr$recall.1[hold.pr$F1.1==max(na.omit(hold.pr$F1.1))], 
#       hold.pr$precision.1[hold.pr$F1.1==max(na.omit(hold.pr$F1.1))], pch = 5)
#points(hold.pr$recall.2[hold.pr$F1.2==max(na.omit(hold.pr$F1.2))], 
#       hold.pr$precision.2[hold.pr$F1.2==max(na.omit(hold.pr$F1.2))], pch = 4)
legend("topright", bty = "n",
       legend = c(paste0("Full model (AUprC = ", round(AUprC.full[[2]],3), ")"), 
                  paste0("Null model (AUprC = ", round(AUprC.null[[2]],3), ")")),
                  #paste0("F1 maximum full (", round(max(na.omit(hold.pr$F1.1)),3),")"), 
                  #paste0("F1 maximum null (", round(max(na.omit(hold.pr$F1.2)),3),")")), 
       fill = c("black", "gray80"),
       border = NA,
       pch = c(NA,NA))#,5,4))
par(mfrow = c(1,1), mar = c(5.1, 4.1, 4.1, 2.1), oma = c(0, 0, 0, 0) )
dev.off()
# gist: full has higher precision/TP over FP rate (discriminates better when predicting/fewer FPs), 
# while null has better recall/TP over FN rate (catches more relevant cases/actuals/fewer FNs).

# pdf(paste0("figures/PRcurve3.", type, ".pdf"), width = 8, height = 7)
F.max.rounded <- (ceiling(max(hold.pr[,grepl("^F", colnames(hold.pr))], na.rm = TRUE)*10)/10)
plot(hold.pr$threshold, hold.pr$F1.1, type = "l", bty = "n", yaxt = "n",
     ylim = c(0, F.max.rounded),
     ylab = expression(paste("F", beta, " score")),
     xlab = "Classification threshold")
axis(2, seq(0,F.max.rounded,0.05), format(seq(0,F.max.rounded,0.05), nsmall = 2), las = 2)
lines(hold.pr$threshold, hold.pr$F2.1, lty = 3)
lines(hold.pr$threshold, hold.pr$F05.1, lty = 2)
lines(hold.pr$threshold, hold.pr$F1.2, col = "gray70")
lines(hold.pr$threshold, hold.pr$F2.2, col = "gray70", lty = 3)
lines(hold.pr$threshold, hold.pr$F05.2, col = "gray70", lty = 2)
legend("topright", 
       legend = c("Full model", "Null model", expression(paste(beta, " = 0.5")), expression(paste(beta, " = 1")), expression(paste(beta, " = 2")) ), 
       fill = c("black", "gray80", NA, NA, NA), 
       border = NA,
       lty = c(FALSE, FALSE, 2, 1, 3),
       bty = "n", cex = 0.9)
text(1, 0.185, pos = 2, cex = 0.85,
    paste0("F(b=0.5) maximum full = ", round(max(na.omit(hold.pr$F05.1)),3),"
            F(b=0.5) maximum null = ", round(max(na.omit(hold.pr$F05.2)),3),"\n
            F(b=1) maximum full = ", round(max(na.omit(hold.pr$F1.1)),3),"
            F(b=1) maximum null = ", round(max(na.omit(hold.pr$F1.2)),3),"\n
            F(b=2) maximum full = ", round(max(na.omit(hold.pr$F2.1)),3),"
            F(b=2) maximum null = ", round(max(na.omit(hold.pr$F2.2)),3)))
dev.off()


### Feature importance
importance1 <- rf.imp
importance2 <- importance1[,!grepl("lat|long|doy|year", colnames(importance1))]
var.names.clean <- c(vci_lag1 = "VCI (t-1)", vci_lag1_j1_max = "Neighboring VCIs (t-1, j-1)",  vci_lag1_j2_max = "Neighboring VCIs (t-1, j-2)", 
                     event_lag1 = "Conflict history (t-1)", event_lag2 = "Conflict history (t-2)", 
                     event_lag1_j1 = "Neighboring conflict history (t-1, j-1)", event_lag2_j1 = "Neighboring conflict history (t-2, j-1)", 
                     event_lag1_j2 = "Neighboring conflict history (t-1, j-2)", event_lag2_j2 = "Neighboring conflict history (t-2, j-2)",     
                     road_dist = "Distance to next large road", water_dist = "Distance to next water body", unbase = "Distance to next UN base",
                     voronoi_area_sqkm = "Voronoi size")
if(!all(colnames(importance2)%in%names(var.names.clean))){stop("Careful, variables names do not match.")}
importance3 <- importance2
colnames(importance3) <- var.names.clean[match(names(var.names.clean), colnames(importance2))] 
apply(importance3, 1, \(x){colnames(importance3)[which(x==max(x))]})
apply(importance3, 1, \(x){colnames(importance3)[order(x, decreasing = TRUE)]})
var.imp.mean <- sort(apply(importance3, 2, mean), decreasing = FALSE)
max.var.imp.mean <- round(max(var.imp.mean),3)
# pdf(file = paste0("figures/VarImp.", type, ".pdf"), width = 9, height = 7)
par(mar = c(5.1, 14.1, 4.1, 2.1))
barplot(var.imp.mean, horiz = TRUE, col = "white", xlim = c(0,max.var.imp.mean), 
        las = 2, cex.names = 0.8, xaxt = "n", xlab = "Feature permutation importance")
axis(1, seq(0,max.var.imp.mean,0.001), seq(0,max.var.imp.mean,0.001))
par(mar = c(5.1, 4.1, 4.1, 2.1))
dev.off()
var.imp.doy <- importance3[,!grepl("event_lag", names(colnames(importance3)))]
max.var.imp.doy <- round(max(var.imp.doy),3)
# pdf(file = paste0("figures/VarImp2.", type, ".pdf"), width = 9, height = 8)
par(mar = c(5.1, 5.1, 4.1, 2.1))
print( bp1 <- barplot(t(var.imp.doy), beside = TRUE, space = c(0, 3), 
                      ylim = c(0,max.var.imp.doy), las = 2, cex.names = 0.8, yaxt = "n", xaxt = "n") )
axis(1, apply(bp1, 2, mean), unique.doys, las = 2)
mtext("Day of year, 2022", 1, 3)
axis(2, seq(0,max.var.imp.doy,0.001), seq(0,max.var.imp.doy,0.001), las = 2)
mtext("Feature permutation importance", 2, 3.8)
legend("topleft", fill = gray.colors(7), legend = colnames(var.imp.doy), bty = "n")
par(mar = c(5.1, 4.1, 4.1, 2.1))
dev.off()
# pdf(file = paste0("figures/VarImp3.", type, ".pdf"), width = 10, height = 7)
line.chart.y.offset <- (0.02*max.var.imp.doy)
par(mar = c(5.1, 5.1, 4.1, 2.1), xpd = TRUE)
matplot(var.imp.doy, type = "l", lty = 1:ncol(var.imp.doy), col = gray.colors(ncol(var.imp.doy)),
        yaxt = "n", xaxt = "n", bty = "n", ylab = "", xlab = "Day of year, 2022")
axis(1, 1:n.frames, unique.doys, las = 1, cex.axis = 0.85)
axis(2, seq(0,max.var.imp.doy,0.001), seq(0,max.var.imp.doy,0.001), las = 2, cex.axis = 0.8)
mtext("Feature permutation importance", 2, 3.8)
text(n.frames, ((var.imp.doy[nrow(var.imp.doy),]+line.chart.y.offset)+c(rep(-2*line.chart.y.offset,2),rep(0,5))), colnames(var.imp.doy), 
     col = gray.colors(ncol(var.imp.doy), end = 0.7), cex = 0.7)
legend("topleft", legend = colnames(var.imp.doy), lty = 1:ncol(var.imp.doy), col = gray.colors(ncol(var.imp.doy)), bty = "n", cex = 0.85)
par(mar = c(5.1, 4.1, 4.1, 2.1))
dev.off()





### PDP
# library(pdp)
# rf.out <- rolling.preds[!sapply(rolling.preds, is.null)][[1]]$rf1
# pdp1 <- partial(rf.out, pred.var = c("vci_lag1", "vci_lag1_j1_max"), chull = TRUE)
# pdp2 <- autoplot(pdp1, contour = TRUE, legend.title = "Partial\ndependence")


### Maps
rm(rolling.preds); gc()
n.color.bins <- 15
all.preds$event.prob.bins.full <- cut(all.preds$event.prob.full, n.color.bins)
all.preds$event.prob.bins.null <- cut(all.preds$event.prob.null, n.color.bins)

## Add separate zero interval: 
# levels(all.preds$event.prob.bins.full) <- c(gsub("\\(.{1,},", "[0,", levels(all.preds$event.prob.bins.full)[1]), levels(all.preds$event.prob.bins.full)[-1], "(0%]")
# all.preds$event.prob.bins.full[all.preds$event.prob.full<=0] <- "(0%]"
# all.preds$event.prob.bins.full <- relevel(all.preds$event.prob.bins.full, "(0%]")
# levels(all.preds$event.prob.bins.null) <- c(gsub("\\(.{1,},", "[0,", levels(all.preds$event.prob.bins.null)[1]), levels(all.preds$event.prob.bins.null)[-1], "(0%]")
# all.preds$event.prob.bins.null[all.preds$event.prob.null<=0] <- "(0%]"
# all.preds$event.prob.bins.null <- relevel(all.preds$event.prob.bins.null, "(0%]")
# n.color.bins <- n.color.bins + 1

all.preds$event.prob.bins.full.col <- as.factor( as.numeric( all.preds$event.prob.bins.full ))
all.preds$event.prob.bins.null.col <- as.factor( as.numeric( all.preds$event.prob.bins.null ))
bin.legend <- c(levels(all.preds$event.prob.bins.full)[1], 
                paste0(c("[0", paste0("(", round((as.numeric(gsub("^(.)(.{1,})(,.{1,}]$)", "\\2", levels(all.preds$event.prob.bins.full)[-1], 
                                                                  perl = TRUE))*100),1)[-1])), "%, ",
                     round((as.numeric(gsub("^(.{1,},)(.{1,})(])$", "\\2", levels(all.preds$event.prob.bins.full)[-1], perl = TRUE))*100),1), "%]"))
all.preds$event.col <- ifelse(all.preds$event==0, 1, n.color.bins)

### Change bounding box if necessary:
# bbox <- st_bbox(all.preds.frame$geometry)
# x.range <- bbox$xmax - bbox$xmin 
# bbox[1] <- bbox[1] + (0.01 * x.range)
# bbox[3] <- bbox[3] - (0.01 * x.range)
# bbox <- st_as_sfc(bbox)
# all.preds.frame$geometry <- st_crop(all.preds.frame$geometry, bbox)

# pdf(file = "figures/pred.map.rolling.2022.full.pdf", width = 6, height = 9)
par(mfrow = c(ceiling(n.frames/4), 4),
    oma = c(0,0,0,0),
    mar = c(1,0,0,0)+0.5)
for(i in 1:n.frames){
  all.preds.frame <- all.preds[all.preds$doy==unique.doys[i],]
  plot(all.preds.frame$geometry,
       col = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.prob.bins.full.col],
       border = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.prob.bins.full.col],
       main = "", axes = FALSE, graticule = TRUE)
  mtext(paste0("Day of year: ", unique.doys[i]), 1, line = 0, cex = 0.65)
  plot(df1$geometry[df1$time=="1"], col = NA, border = "gray50", lwd = 0.03,
       main = "", add = TRUE)
   # par(xpd=TRUE)
   # legend(x = 5, cex = 0.8,
   #        title = "\n Predicted probability \n of inter-communal violence",
   #        fill = heat.colors(n.color.bins, rev = TRUE),
   #        legend = bin.legend,
   #        bty = "n")
   # par(xpd=FALSE)
}
dev.off()
par(mfrow = c(1,1))

# pdf(file = "figures/pred.map.rolling.2022.null.pdf", width = 6, height = 9)
par(mfrow = c(ceiling(n.frames/4), 4),
    oma = c(0,0,0,0),
    mar = c(1,0,0,0)+0.5)
for(i in 1:n.frames){
  all.preds.frame <- all.preds[all.preds$doy==unique.doys[i],]
  plot(all.preds.frame$geometry,
       col = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.prob.bins.null.col],
       border = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.prob.bins.null.col],
       main = "", axes = FALSE, graticule = TRUE)
  mtext(paste0("Day of year: ", unique.doys[i]), 1, line = 0, cex = 0.65)
  plot(df1$geometry[df1$time=="1"], col = NA, border = "gray50", lwd = 0.03,
       main = "", add = TRUE)
  #  par(xpd=TRUE)
  #  legend("topright", cex = 0.8,
  #         title = "\n Predicted probability \n of inter-communal violence",
  #         fill = heat.colors(n.color.bins, rev = TRUE),
  #         legend = bin.legend,
  #         bty = "n")
  #  par(xpd=FALSE)
}
dev.off()
par(mfrow = c(1,1))

# pdf(file = "figures/pred.map.2022.observed.pdf", width = 6, height = 9)
par(mfrow = c(ceiling(n.frames/4), 4),
    oma = c(0,0,0,0),
    mar = c(1,0,0,0)+0.5)
for(i in 1:n.frames){
  all.preds.frame <- all.preds[all.preds$doy==unique.doys[i],]
  plot(all.preds.frame$geometry,
       col = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.col],
       border = heat.colors(n.color.bins, rev = TRUE)[all.preds.frame$event.col],
       main = "", axes = FALSE, graticule = TRUE)
  mtext(paste0("Day of year: ", unique.doys[i]), 1, line = 0, cex = 0.65)
  plot(df1$geometry[df1$time=="1"], col = NA, border = "gray50", lwd = 0.03,
       main = "", add = TRUE)
}
dev.off()
par(mfrow = c(1,1))

## Selecting only three frames where all three relevant maps (observed, full, null)
## can be easily compared:
# pdf(file = "figures/pred.map.2022.comp.pdf", width = 5, height = 6)
comp.frames <- c(8,14,21)
n.comp.frames <- length(comp.frames)
comp.vars <- c("event.col", "event.prob.bins.full.col", "event.prob.bins.null.col")
comp.titles <- c("Observed events", "Pred. prob. (full)", "Pred. prob. (null)")
frame.titles <- paste0( format(as.Date(unique.doys[comp.frames], origin = "2022-01-01"), "%d/%b/%y"), " - ", 
                        format(as.Date(unique.doys[(comp.frames+1)], origin = "2022-01-01"), "%d/%b/%y") )
par(mfrow = c(n.comp.frames, length(comp.vars)),
    oma = c(2,2,0,0),
    mar = c(0.5,0.5,1,0.5)+0.5)
for(i in 1:n.comp.frames){
  for(j in 1:length(comp.vars)){
    all.preds.frame <- all.preds[all.preds$doy==unique.doys[comp.frames][i],]
    if(i==1){
      plot(all.preds.frame$geometry,
           col = heat.colors(n.color.bins, rev = TRUE)[ all.preds.frame[[ comp.vars[j] ]] ],
           border = heat.colors(n.color.bins, rev = TRUE)[ all.preds.frame[[ comp.vars[j] ]] ],
           main = comp.titles[j], axes = FALSE, graticule = TRUE)
    }else{
      plot(all.preds.frame$geometry,
           col = heat.colors(n.color.bins, rev = TRUE)[ all.preds.frame[[ comp.vars[j] ]] ],
           border = heat.colors(n.color.bins, rev = TRUE)[ all.preds.frame[[ comp.vars[j] ]] ],
           main = "", axes = FALSE, graticule = TRUE)}
    mtext(side=2, line=1.5, 
          ifelse(j==1, frame.titles[i], ""), 
          font = 2, cex = 0.8)
    plot(df1$geometry[df1$time=="101"], col = NA, border = alpha("gray50", 0.4), lwd = 0.01,
         main = "", add = TRUE)
  }
}
dev.off()
par(mfrow = c(1,1))


## Same as above, but with heat densities: 
library(gridExtra)
comp.frames <- c(8,14,21)
n.comp.frames <- length(comp.frames)
comp.vars <- c("event.col", "event.prob.bins.full.col", "event.prob.bins.null.col")
comp.vars.raw <- c("event", "event.prob.full", "event.prob.null")
comp.titles <- c("Observed events", "Pred. prob. (full model)", "Pred. prob. (null model)")
frame.titles <- paste0( format(as.Date(unique.doys[comp.frames], origin = "2022-01-01"), "%d.%m.%y"), "-", 
                        format(as.Date(unique.doys[(comp.frames+1)], origin = "2022-01-01"), "%d.%m.%y") )
all.preds <- data.frame(all.preds, setNames(as.data.frame(st_coordinates(st_centroid(all.preds$geometry))), c("lon", "lat")))
ggmap1 <- ggplot(all.preds$geometry[all.preds$doy==1]) + geom_sf(lwd = 0.05, color = "gray30")
gg.list <- list()
for(i in 1:n.comp.frames){
  for(j in 1:length(comp.vars)){
    all.preds.frame <- all.preds[all.preds$doy==unique.doys[comp.frames][i],]
    #all.preds.frame <- all.preds.frame[as.numeric(as.character(all.preds.frame[[ comp.vars.raw[j] ]]))>0,]
    all.preds.frame <- all.preds.frame[as.numeric(as.character(all.preds.frame[[ comp.vars[j] ]]))>1,]
    temp.density.multiplier <- as.numeric(as.character(all.preds.frame [[ comp.vars[j] ]] ))# * as.numeric(cut(log(all.preds.frame$voronoi_area_sqkm+1), 3))^2
    all.preds.frame <- all.preds.frame[rep(row.names(all.preds.frame), temp.density.multiplier), ]
    temp.rownames.multiplied <- grepl("\\s\\d{1,}\\.\\d{1,}\\.\\d{1,}$", rownames(all.preds.frame))
    all.preds.frame[temp.rownames.multiplied, c("lat", "lon")] <- apply(all.preds.frame[temp.rownames.multiplied,c("lat", "lon")], 2, jitter, 100)
    gg.list[[j+((i-1)*3)]] <- ggmap1 + 
      stat_density_2d(data = all.preds.frame,
                      aes(x = lon, y = lat, fill = after_stat(level)),
                      alpha = .9, n = 200, adjust = 0.2,#ifelse(j==1, 0.05, 0.02), 
                      geom="density_2d_filled") + 
      theme(legend.position = "none",
            axis.text.x = element_blank(),
            axis.text.y = element_blank(),
            axis.title.y=element_blank(),
            axis.title.x=element_blank(),
            axis.ticks = element_blank(),
            rect = element_blank()) + #plot.margin = unit(c(0.1,0.1,0.1,0.1), "cm") 
      scale_fill_gradient(low = "#fad3d3", high = "#da0000")
  }
}

ggs <- arrangeGrob(arrangeGrob(gg.list[[1]], top = comp.titles[1], left = frame.titles[1]), 
                   arrangeGrob(gg.list[[2]], top = comp.titles[2], left = ""),
                   arrangeGrob(gg.list[[3]], top = comp.titles[3], left = ""), 
                   arrangeGrob(gg.list[[4]], left = frame.titles[2]), 
                   arrangeGrob(gg.list[[5]], left = ""),
                   arrangeGrob(gg.list[[6]], left = ""),
                   arrangeGrob(gg.list[[7]], left = frame.titles[3]), 
                   arrangeGrob(gg.list[[8]], left = ""), 
                   arrangeGrob(gg.list[[9]], left = ""), 
                   ncol = 3, nrow = 3)
# ggsave(filename = "figures/pred.map.2022.comp.gg.pdf", plot = ggs, width = 6, height = 7)


